#######################################################################
# REDUCED FORM SIMULATIONS: EXCLUSION RESTRICTION (DV: COUNT VICTIMS) #
#######################################################################
# Author: Kasia Nalewajko
# First created: 15 November 2023
# Replicated: 10 June 2024

rm(list = ls())

# LOAD PACKAGES -----------------------------------------------------------

if (!require("dplyr")) install.packages("dplyr")
if (!require("modelsummary")) install.packages("modelsummary")
if (!require("estimatr")) install.packages("estimatr")
if (!require("ggplot2")) install.packages("ggplot2")

# LOAD DATA ---------------------------------------------------------------

load("./00 SUBMITTED/00 APSR final/04 replication_files/01 data/main.Rda")

# RUN THE MODELS -------

# Choose 25% observations at random to check whether the result is due to chance and then plot the p values --------

# DV: Logged number of victims

models_list <- list()

simulation_id <- rep(NA, 1500)
p <- rep(NA, 1500)
nobs <- rep(NA, 1500)

# Loop over outcomes
for (i in 1:1500) {
  
  set.seed(i)
  
  models_list[[i]] <-  main %>% 
    dplyr::sample_frac(0.25) %>% 
    lm_robust(formula = log(all_jewish_victims+1) ~ log(mpf1000+1) + log(pop1936+1) + synagogues_dummy  
              + area_sqkm  + longitude + longitudesq + latitude + latitudesq + zone5,
              clusters = id_bureau,
              fixed_effects = as.factor(ar_name)
    )
  
  simulation_id[[i]] <- i
  p[[i]] <- models_list[[i]][["p.value"]][["log(mpf1000 + 1)"]]
  nobs[[i]] <- models_list[[i]][["nobs"]]
  
  print(i)
}

# PUT TOGETHER DATA FRAME FOR PLOTTING -------

pvalues_simulatedcount <- data.frame(
  simulation_id = simulation_id,
  p = p,
  nobs = nobs)

pvalues_simulatedcount %>% 
  filter(p < 0.099)


# PLOT ---------------------------------------------------------------

pvalues_simulatedcount %>% 
  ggplot() +
  aes(x = p) %>% 
  geom_histogram(binwidth = 0.01) +
  geom_vline(aes(xintercept = 0.05), linetype = 1, color = "gray20") +
  geom_text(aes(x=0.05, label="0.05\n", y=400), colour="blue", angle=90, text=element_text(size=11)) +
  theme_bw() +
  labs(
    subtitle = "",
    x = "P-values",
    y = "Count regressions"
  )


# SAVE ---------------------------------------------------------------

ggsave(
  "A9_pvalues_reduced_simulatedcount.png",
  plot = last_plot(),
  path = "./00 SUBMITTED/00 APSR final/03 dataverse_online_appendix/figures",
  width = 21,
  height = 18,
  units = "cm",
  dpi = 300
)
 
